Notes#
import os
from git import Repo
import dimcat as dc
import ms3
import pandas as pd
import plotly.express as px
from utils import CORPUS_COLOR_SCALE, STD_LAYOUT
CORPUS_PATH = os.environ.get('CORPUS_PATH', "~/dcml_corpora")
CORPUS_PATH
'~/dcml_corpora'
repo = Repo(CORPUS_PATH)
notebook_repo = Repo('.', search_parent_directories=True)
notebook_repo_path = notebook_repo.git.rev_parse("--show-toplevel")
print(f"Notebook repository '{os.path.basename(notebook_repo_path)}' @ {notebook_repo.commit().hexsha[:7]}")
print(f"Data repo '{os.path.basename(CORPUS_PATH)}' @ {repo.commit().hexsha[:7]}")
print(f"dimcat version {dc.__version__}")
print(f"ms3 version {ms3.__version__}")
Notebook repository 'notebooks' @ f478afd
Data repo 'dcml_corpora' @ 3612b3b
dimcat version 0.3.0.post1.dev104+g9c57474
ms3 version 1.2.4.post0.dev2+gf30d960
Data loading#
dataset = dc.Dataset()
for folder in ['corelli', 'liszt_pelerinage']:
print("Loading", folder)
path = os.path.join(CORPUS_PATH, folder)
dataset.load(directory=path)
dataset.data
Loading corelli
Loading liszt_pelerinage
[default|all]
All corpora
-----------
View: This view is called 'default'. It
- excludes fnames that are not contained in the metadata,
- filters out file extensions requiring conversion (such as .xml), and
- excludes review files and folders.
has active scores measures notes expanded
metadata view detected detected parsed detected parsed detected parsed
corpus
corelli yes default 149 149 149 149 149 149 149
liszt_pelerinage yes default 19 19 19 19 19 19 19
1191/3375 files are excluded from this view.
1176 files have been excluded based on their subdir.
15 files have been excluded based on their file name.
all_metadata = dataset.data.metadata()
print(f"Concatenated 'metadata.tsv' files cover {len(all_metadata)} of the {dataset.data.n_pieces} scores.")
all_metadata.groupby(level=0).nth(0)
Concatenated 'metadata.tsv' files cover 168 of the 168 scores.
| TimeSig | KeySig | last_mc | last_mn | length_qb | last_mc_unfolded | last_mn_unfolded | length_qb_unfolded | volta_mcs | all_notes_qb | ... | staff_4_instrument | score_integrity | composed_source | lyricist_text | imslp | musicbrainz | viaf | wikidata | typesetter | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | |||||||||||||||||||||
| corelli | 1: 4/4 | 1: -1 | 14 | 14 | 56.0 | 14 | 14 | 56.0 | NaN | 224.00 | ... | Keyboard | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| liszt_pelerinage | 1: 4/4 | 1: 0 | 97 | 97 | 388.0 | 97 | 97 | 388.0 | NaN | 1902.42 | ... | NaN | Tom Schreyer | OxfordMusicOnline | NaN | https://imslp.org/wiki/Ann%C3%A9es_de_p%C3%A8l... | https://musicbrainz.org/work/5804701d-54a6-4c9... | https://viaf.org/viaf/179020308/ | https://www.wikidata.org/wiki/Q567462 | https://imslp.org/wiki/Special:ReverseLookup/1... | NaN |
2 rows × 65 columns
annotated = dc.IsAnnotatedFilter().process_data(dataset)
print(f"Before: {len(dataset.indices[()])} IDs, after filtering: {len(annotated.indices[()])}")
Before: 168 IDs, after filtering: 168
Choose here if you want to see stats for all or only for annotated scores.
#selected = dataset
selected = annotated
Compute chronological order
summary = all_metadata[all_metadata.label_count > 0]
print(f"Selected metadata rows cover {len(summary)} of the {len(sum((ixs for _, ixs in selected.iter_groups()), start=[]))} scores.")
mean_composition_years = summary.groupby(level=0).composed_end.mean().astype(int).sort_values()
chronological_order = mean_composition_years.index.to_list()
dataset_colors = dict(zip(chronological_order, CORPUS_COLOR_SCALE))
chronological_order
Selected metadata rows cover 168 of the 168 scores.
['corelli', 'liszt_pelerinage']
all_notes = selected.get_facet('notes')
print(f"{len(all_notes.index)} notes over {len(all_notes.groupby(level=[0,1]))} files.")
all_notes.head()
129856 notes over 168 files.
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | duration | nominal_duration | scalar | tied | tpc | midi | name | octave | chord_id | tremolo | gracenote | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | interval | ||||||||||||||||||||
| corelli | op01n01a | [0.0, 1.0) | 1 | 1 | 0 | 1.0 | 0 | 0 | 4/4 | 3 | 1 | 1/4 | 1/4 | 1 | <NA> | -1 | 53 | F3 | 3 | 8 | NaN | NaN |
| [0.0, 1.0) | 1 | 1 | 0 | 1.0 | 0 | 0 | 4/4 | 4 | 1 | 1/4 | 1/4 | 1 | <NA> | -1 | 53 | F3 | 3 | 14 | NaN | NaN | ||
| [0.0, 1.0) | 1 | 1 | 0 | 1.0 | 0 | 0 | 4/4 | 2 | 1 | 1/4 | 1/4 | 1 | <NA> | 3 | 81 | A5 | 5 | 4 | NaN | NaN | ||
| [0.0, 1.0) | 1 | 1 | 0 | 1.0 | 0 | 0 | 4/4 | 1 | 1 | 1/4 | 1/4 | 1 | <NA> | 0 | 84 | C6 | 6 | 0 | NaN | NaN | ||
| [1.0, 2.0) | 1 | 1 | 1 | 1.0 | 1/4 | 1/4 | 4/4 | 3 | 1 | 1/4 | 1/4 | 1 | <NA> | 1 | 55 | G3 | 3 | 9 | NaN | NaN |
def weight_notes(nl, group_col='midi', precise=True):
summed_durations = nl.groupby(group_col).duration_qb.sum()
summed_durations /= summed_durations.min() # normalize such that the shortest duration results in 1 occurrence
if not precise:
# This simple trick reduces compute time but also precision:
# The rationale is to have the smallest value be slightly larger than 0.5 because
# if it was exactly 0.5 it would be rounded down by repeat_notes_according_to_weights()
summed_durations /= 1.9999999
return repeat_notes_according_to_weights(summed_durations)
def repeat_notes_according_to_weights(weights):
counts = weights.round().astype(int)
counts_reflecting_weights = []
for pitch, count in counts.iteritems():
counts_reflecting_weights.extend([pitch]*count)
return pd.Series(counts_reflecting_weights)
corpus_names = dict(
corelli='Corelli Trio Sonatas',
mozart_piano_sonatas='Mozart Piano Sonatas',
ABC='Beethoven String Quartets',
beethoven_piano_sonatas='Beethoven Sonatas',
chopin_mazurkas='Chopin Mazurkas',
debussy_suite_bergamasque='Debussy Suite',
dvorak_silhouettes="Dvořák Silhouettes",
grieg_lyric_pieces="Grieg Lyric Pieces",
liszt_pelerinage="Liszt Années",
medtner_tales="Medtner Tales",
schumann_kinderszenen="Schumann Kinderszenen",
tchaikovsky_seasons="Tchaikovsky Seasons"
)
dataset_name_colors = {corpus_names[corp]: color for corp, color in dataset_colors.items()}
chronological_corpus_names = [corpus_names[corp] for corp in chronological_order]
all_notes['dataset_name'] = all_notes.index.get_level_values(0).map(corpus_names)
grouped_notes = all_notes.groupby('dataset_name')
weighted_midi = pd.concat([weight_notes(nl, 'midi', precise=False) for _, nl in grouped_notes], keys=grouped_notes.groups.keys()).reset_index(level=0)
weighted_midi.columns = ['dataset', 'midi']
weighted_midi
/tmp/ipykernel_55525/2427151401.py:14: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for pitch, count in counts.iteritems():
/tmp/ipykernel_55525/2427151401.py:14: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for pitch, count in counts.iteritems():
| dataset | midi | |
|---|---|---|
| 0 | Corelli Trio Sonatas | 36 |
| 1 | Corelli Trio Sonatas | 36 |
| 2 | Corelli Trio Sonatas | 36 |
| 3 | Corelli Trio Sonatas | 36 |
| 4 | Corelli Trio Sonatas | 36 |
| ... | ... | ... |
| 100505 | Liszt Années | 102 |
| 100506 | Liszt Années | 102 |
| 100507 | Liszt Années | 102 |
| 100508 | Liszt Années | 102 |
| 100509 | Liszt Années | 102 |
116659 rows × 2 columns
yaxis=dict(tickmode= 'array',
tickvals= [12, 24, 36, 48, 60, 72, 84, 96],
ticktext = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7"],
gridcolor='lightgrey',
)
fig = px.violin(weighted_midi, x='dataset', y='midi', color='dataset', box=True,
labels=dict(
dataset='',
midi='distribution of pitches by duration'
),
category_orders=dict(dataset=chronological_corpus_names),
color_discrete_map=dataset_name_colors,
width=1000, height=600,
)
fig.update_traces(spanmode='hard') # do not extend beyond outliers
fig.update_layout(yaxis=yaxis, **STD_LAYOUT,
showlegend=False)
fig.show()
bar_data = all_notes.groupby('tpc').duration_qb.sum().reset_index()
x_values = list(range(bar_data.tpc.min(), bar_data.tpc.max()+1))
x_names = ms3.fifths2name(x_values)
fig = px.bar(bar_data, x='tpc', y='duration_qb',
labels=dict(tpc='Named pitch class',
duration_qb='Duration in quarter notes'
),
color_discrete_sequence=CORPUS_COLOR_SCALE,
width=1000, height=300,
)
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.update_xaxes(gridcolor='lightgrey', zerolinecolor='grey', tickmode='array',
tickvals=x_values, ticktext = x_names, dtick=1, ticks='outside', tickcolor='black',
minor=dict(dtick=6, gridcolor='grey', showgrid=True),
)
fig.show()
scatter_data = all_notes.groupby(['dataset_name', 'tpc']).duration_qb.sum().reset_index()
fig = px.scatter(scatter_data, x='tpc', y='duration_qb', color='dataset_name',
labels=dict(
duration_qb='duration',
tpc='named pitch class',
),
category_orders=dict(dataset=chronological_corpus_names),
color_discrete_map=dataset_name_colors,
facet_col='dataset_name', facet_col_wrap=3, facet_col_spacing=0.03,
width=1000, height=500,
)
fig.update_traces(mode='lines+markers')
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**STD_LAYOUT, showlegend=False)
fig.update_xaxes(gridcolor='lightgrey', zerolinecolor='lightgrey', tickmode='array', tickvals= [-12, -6, 0, 6, 12, 18],
ticktext = ["Dbb", "Gb", "C", "F#", "B#", "E##"], visible=True, )
fig.update_yaxes(gridcolor='lightgrey', zeroline=False, matches=None, showticklabels=True)
fig.show()
px.bar(scatter_data, x='tpc', y='duration_qb', color='dataset_name',
labels=dict(
duration_qb='duration',
tpc='named pitch class',
),
category_orders=dict(dataset=chronological_corpus_names),
color_discrete_map=dataset_name_colors,
width=1000, height=500,
)
no_accidental = bar_data[bar_data.tpc.between(-1,5)].duration_qb.sum()
with_accidental = bar_data[~bar_data.tpc.between(-1,5)].duration_qb.sum()
entire = no_accidental + with_accidental
f"Fraction of note duration without accidental of the entire durations: {no_accidental} / {entire} = {no_accidental / entire}"
'Fraction of note duration without accidental of the entire durations: 67269.33143939394 / 98087.32314814815 = 0.685810656059931'
Notes and staves#
print("Distribution of notes over staves:")
all_notes.staff.value_counts()
Distribution of notes over staves:
1 51729
2 43392
3 18602
4 16133
Name: staff, dtype: Int64
print("Distribution of notes over staves for all pieces with more than two staves\n")
for group, df in all_notes.groupby(level=[0,1]):
if (df.staff > 2).any():
print(group)
print(df.staff.value_counts().to_dict())
Distribution of notes over staves for all pieces with more than two staves
('corelli', 'op01n01a')
{1: 78, 3: 72, 4: 70, 2: 69}
('corelli', 'op01n01b')
{1: 259, 2: 237, 3: 143, 4: 116}
('corelli', 'op01n01c')
{3: 90, 4: 90, 2: 75, 1: 74}
('corelli', 'op01n01d')
{1: 280, 2: 229, 4: 199, 3: 171}
('corelli', 'op01n02a')
{2: 89, 1: 84, 4: 77, 3: 74}
('corelli', 'op01n02b')
{1: 142, 3: 135, 2: 127, 4: 111}
('corelli', 'op01n02c')
{2: 62, 1: 61, 3: 56, 4: 56}
('corelli', 'op01n02d')
{1: 223, 2: 214, 4: 203, 3: 181}
('corelli', 'op01n03a')
{1: 86, 2: 81, 3: 56, 4: 56}
('corelli', 'op01n03b')
{2: 319, 1: 315, 3: 314, 4: 232}
('corelli', 'op01n03c')
{2: 76, 1: 68, 3: 61}
('corelli', 'op01n03d')
{1: 332, 2: 262, 4: 167, 3: 133}
('corelli', 'op01n04a')
{1: 159, 2: 136, 3: 66, 4: 48}
('corelli', 'op01n04b')
{2: 55, 1: 53, 4: 47, 3: 46}
('corelli', 'op01n04c')
{2: 218, 1: 216, 4: 160, 3: 117}
('corelli', 'op01n04d')
{1: 157, 2: 149, 3: 135, 4: 122}
('corelli', 'op01n05a')
{1: 105, 2: 96, 3: 85, 4: 85}
('corelli', 'op01n05b')
{1: 240, 2: 190, 4: 183, 3: 180}
('corelli', 'op01n05c')
{1: 82, 3: 78, 4: 78, 2: 77}
('corelli', 'op01n05d')
{1: 210, 2: 202, 4: 129, 3: 129}
('corelli', 'op01n06a')
{1: 56, 2: 50, 3: 42, 4: 42}
('corelli', 'op01n06b')
{1: 266, 2: 238, 3: 191, 4: 189}
('corelli', 'op01n06c')
{1: 106, 2: 102, 4: 77, 3: 71}
('corelli', 'op01n06d')
{1: 192, 2: 189, 4: 139, 3: 112}
('corelli', 'op01n07a')
{1: 323, 2: 274, 3: 164, 4: 150}
('corelli', 'op01n07b')
{3: 50, 4: 50, 2: 49, 1: 45}
('corelli', 'op01n07c')
{1: 253, 2: 205, 3: 188, 4: 171}
('corelli', 'op01n08a')
{4: 52, 1: 51, 3: 49, 2: 44}
('corelli', 'op01n08b')
{1: 148, 2: 122, 3: 95, 4: 80}
('corelli', 'op01n08c')
{1: 122, 2: 117, 4: 108, 3: 97}
('corelli', 'op01n08d')
{1: 102, 2: 89, 4: 69, 3: 65}
('corelli', 'op01n09a')
{1: 125, 2: 124, 4: 67, 3: 61}
('corelli', 'op01n09b')
{1: 226, 2: 196, 4: 165, 3: 153}
('corelli', 'op01n09c')
{1: 89, 2: 82, 3: 78, 4: 78}
('corelli', 'op01n09d')
{1: 186, 2: 183, 3: 89, 4: 76}
('corelli', 'op01n10a')
{2: 41, 3: 39, 4: 39, 1: 37}
('corelli', 'op01n10b')
{1: 130, 4: 90, 2: 79, 3: 71}
('corelli', 'op01n10c')
{1: 145, 2: 134, 3: 122, 4: 120}
('corelli', 'op01n10d')
{1: 55, 2: 51, 3: 46, 4: 46}
('corelli', 'op01n10e')
{1: 250, 2: 229, 3: 149, 4: 135}
('corelli', 'op01n11a')
{2: 67, 1: 67, 3: 42, 4: 42}
('corelli', 'op01n11b')
{1: 225, 2: 192, 3: 89, 4: 81}
('corelli', 'op01n11c')
{2: 76, 1: 74, 3: 70, 4: 70}
('corelli', 'op01n11d')
{3: 133, 4: 133, 1: 97, 2: 85}
('corelli', 'op01n12a')
{3: 144, 4: 144, 1: 78, 2: 67}
('corelli', 'op01n12b')
{1: 335, 2: 285, 3: 71, 4: 71}
('corelli', 'op01n12c')
{2: 51, 1: 46, 3: 38, 4: 38}
('corelli', 'op01n12d')
{1: 358, 2: 323, 3: 305, 4: 244}
('corelli', 'op03n01a')
{3: 101, 4: 101, 1: 78, 2: 71}
('corelli', 'op03n01b')
{1: 261, 2: 220, 3: 168, 4: 144}
('corelli', 'op03n01c')
{1: 181, 2: 158, 3: 152, 4: 137}
('corelli', 'op03n01d')
{1: 183, 3: 173, 4: 170, 2: 157}
('corelli', 'op03n02a')
{3: 110, 4: 110, 1: 62, 2: 55}
('corelli', 'op03n02b')
{1: 155, 2: 145, 3: 113, 4: 113}
('corelli', 'op03n02c')
{2: 89, 1: 87, 3: 86, 4: 86}
('corelli', 'op03n02d')
{1: 202, 2: 156, 3: 147, 4: 141}
('corelli', 'op03n03a')
{3: 71, 4: 71, 1: 63, 2: 58}
('corelli', 'op03n03b')
{1: 113, 2: 110, 3: 55, 4: 55}
('corelli', 'op03n03c')
{1: 155, 2: 131, 3: 104, 4: 104}
('corelli', 'op03n03d')
{1: 268, 3: 249, 4: 246, 2: 222}
('corelli', 'op03n04a')
{3: 138, 1: 135, 4: 134, 2: 131}
('corelli', 'op03n04b')
{1: 170, 2: 156, 3: 102, 4: 100}
('corelli', 'op03n04c')
{1: 166, 3: 123, 4: 123, 2: 118}
('corelli', 'op03n04d')
{1: 215, 2: 207, 3: 178, 4: 178}
('corelli', 'op03n05a')
{3: 113, 4: 107, 1: 89, 2: 85}
('corelli', 'op03n05b')
{1: 264, 2: 204, 3: 195, 4: 169}
('corelli', 'op03n05c')
{1: 84, 2: 72, 3: 70, 4: 70}
('corelli', 'op03n05d')
{1: 228, 2: 191, 3: 163, 4: 129}
('corelli', 'op03n06a')
{1: 177, 2: 155, 3: 130, 4: 130}
('corelli', 'op03n06b')
{1: 53, 4: 52, 3: 47, 2: 47}
('corelli', 'op03n06c')
{1: 235, 2: 192, 4: 175, 3: 166}
('corelli', 'op03n06d')
{1: 181, 2: 160, 3: 131, 4: 131}
('corelli', 'op03n07a')
{4: 66, 1: 62, 3: 60, 2: 52}
('corelli', 'op03n07b')
{4: 135, 1: 126, 3: 112, 2: 103}
('corelli', 'op03n07c')
{1: 93, 3: 84, 4: 84, 2: 78}
('corelli', 'op03n07d')
{1: 139, 2: 108, 3: 107, 4: 107}
('corelli', 'op03n08a')
{1: 122, 3: 119, 4: 119, 2: 106}
('corelli', 'op03n08b')
{1: 266, 2: 232, 3: 188, 4: 186}
('corelli', 'op03n08c')
{3: 76, 4: 75, 2: 73, 1: 68}
('corelli', 'op03n08d')
{1: 331, 2: 323, 3: 268, 4: 232}
('corelli', 'op03n09a')
{1: 69, 2: 61, 3: 59, 4: 59}
('corelli', 'op03n09b')
{1: 127, 2: 115, 3: 113, 4: 111}
('corelli', 'op03n09c')
{1: 93, 2: 90, 3: 85, 4: 85}
('corelli', 'op03n09d')
{3: 182, 4: 164, 1: 117, 2: 110}
('corelli', 'op03n10a')
{1: 67, 2: 59, 3: 51, 4: 51}
('corelli', 'op03n10b')
{1: 271, 2: 237, 3: 234, 4: 232}
('corelli', 'op03n10c')
{3: 23, 4: 23, 1: 23, 2: 20}
('corelli', 'op03n10d')
{1: 271, 2: 233, 3: 71, 4: 71}
('corelli', 'op03n11a')
{1: 84, 2: 79, 3: 62, 4: 62}
('corelli', 'op03n11b')
{3: 250, 4: 233, 1: 170, 2: 161}
('corelli', 'op03n11c')
{2: 62, 1: 62, 3: 61, 4: 61}
('corelli', 'op03n11d')
{1: 112, 3: 97, 4: 97, 2: 91}
('corelli', 'op03n12a')
{1: 121, 2: 119, 3: 29, 4: 29}
('corelli', 'op03n12b')
{1: 148, 2: 142, 3: 41, 4: 41}
('corelli', 'op03n12c')
{3: 41, 4: 41, 1: 34, 2: 26}
('corelli', 'op03n12d')
{3: 316, 4: 182, 2: 96, 1: 95}
('corelli', 'op03n12e')
{1: 184, 2: 164, 3: 123, 4: 72}
('corelli', 'op03n12f')
{1: 301, 2: 273, 3: 235, 4: 171}
('corelli', 'op03n12g')
{3: 197, 4: 197, 1: 192, 2: 172}
('corelli', 'op04n01a')
{1: 84, 3: 72, 4: 72, 2: 69}
('corelli', 'op04n01b')
{3: 156, 1: 156, 2: 108}
('corelli', 'op04n01c')
{3: 82, 2: 79, 1: 72}
('corelli', 'op04n01d')
{3: 258, 4: 258, 1: 112, 2: 91}
('corelli', 'op04n02a')
{3: 131, 4: 131, 2: 76, 1: 74}
('corelli', 'op04n02b')
{1: 135, 3: 132, 2: 115}
('corelli', 'op04n02c')
{3: 11, 2: 11, 1: 11}
('corelli', 'op04n02d')
{1: 131, 2: 113, 3: 111, 4: 111}
('corelli', 'op04n03a')
{1: 104, 2: 88, 3: 80, 4: 80}
('corelli', 'op04n03b')
{1: 263, 3: 113, 4: 113, 2: 107}
('corelli', 'op04n03c')
{3: 84, 4: 84, 1: 58, 2: 47}
('corelli', 'op04n03d')
{1: 140, 3: 122, 4: 122, 2: 98}
('corelli', 'op04n04a')
{3: 91, 4: 91, 1: 62, 2: 60}
('corelli', 'op04n04b')
{1: 116, 2: 94, 3: 87, 4: 87}
('corelli', 'op04n04c')
{3: 116, 4: 116, 1: 86, 2: 86}
('corelli', 'op04n04d')
{1: 304, 3: 178, 2: 166}
('corelli', 'op04n05a')
{3: 112, 4: 112, 1: 95, 2: 86}
('corelli', 'op04n05b')
{3: 213, 4: 213, 1: 117, 2: 107}
('corelli', 'op04n05c')
{1: 89, 2: 84, 3: 75, 4: 75}
('corelli', 'op04n05d')
{1: 40, 2: 34, 3: 34, 4: 34}
('corelli', 'op04n06a')
{3: 39, 1: 28, 2: 20}
('corelli', 'op04n06b')
{1: 91, 2: 84, 3: 44}
('corelli', 'op04n06c')
{1: 18, 2: 16, 3: 15}
('corelli', 'op04n06d')
{1: 116, 2: 93, 3: 76}
('corelli', 'op04n06e')
{1: 22, 2: 21, 3: 20}
('corelli', 'op04n06f')
{1: 138, 2: 87, 3: 66, 4: 66}
('corelli', 'op04n06g')
{1: 226, 2: 139, 3: 105, 4: 105}
('corelli', 'op04n07a')
{1: 98, 3: 85, 4: 85, 2: 80}
('corelli', 'op04n07b')
{1: 111, 3: 99, 2: 93}
('corelli', 'op04n07c')
{3: 12, 2: 12, 1: 12}
('corelli', 'op04n07d')
{3: 164, 4: 164, 2: 59, 1: 49}
('corelli', 'op04n07e')
{1: 309, 3: 88, 4: 88, 2: 78}
('corelli', 'op04n08a')
{1: 90, 2: 85, 3: 80, 4: 80}
('corelli', 'op04n08b')
{3: 342, 4: 342, 2: 87, 1: 85}
('corelli', 'op04n08c')
{3: 85, 4: 85, 2: 47, 1: 47}
('corelli', 'op04n09a')
{2: 111, 1: 105, 3: 78, 4: 78}
('corelli', 'op04n09b')
{1: 175, 2: 147, 3: 120, 4: 120}
('corelli', 'op04n09c')
{1: 45, 3: 39, 4: 39, 2: 37}
('corelli', 'op04n09d')
{1: 258, 2: 220, 4: 143, 3: 140}
('corelli', 'op04n10a')
{3: 4, 1: 4, 2: 4}
('corelli', 'op04n10b')
{1: 304, 3: 186, 2: 163}
('corelli', 'op04n10c')
{2: 8, 3: 7, 1: 7}
('corelli', 'op04n10d')
{3: 51, 4: 51, 1: 45, 2: 38}
('corelli', 'op04n10e')
{1: 162, 3: 140, 4: 140, 2: 126}
('corelli', 'op04n11a')
{3: 153, 4: 153, 1: 109, 2: 105}
('corelli', 'op04n11b')
{3: 238, 4: 238, 1: 234, 2: 175}
('corelli', 'op04n11c')
{3: 268, 4: 268, 1: 121, 2: 116}
('corelli', 'op04n12a')
{1: 90, 3: 80, 4: 80, 2: 75}
('corelli', 'op04n12b')
{1: 296, 3: 110, 4: 110, 2: 105}
('corelli', 'op04n12c')
{1: 207, 2: 61, 3: 58, 4: 58}
('liszt_pelerinage', '161.04_Sonetto_47_del_Petrarca')
{1: 1076, 2: 628, 3: 42, 4: 29}
('liszt_pelerinage', '161.07_Apres_une_lecture_du_Dante')
{1: 6638, 2: 5181, 3: 50}
('liszt_pelerinage', '162.01_Gondoliera')
{3: 1745, 4: 955}
all_notes[all_notes.staff > 2].groupby(level=[0,1]).staff.value_counts()
corpus fname staff
corelli op01n01a 3 72
4 70
op01n01b 3 143
4 116
op01n01c 3 90
...
liszt_pelerinage 161.04_Sonetto_47_del_Petrarca 3 42
4 29
161.07_Apres_une_lecture_du_Dante 3 50
162.01_Gondoliera 3 1745
4 955
Name: staff, Length: 287, dtype: int64